This notebook contains a code snippet that generats a pandas DataFrame containing the summary for a given knowledge assembly, split by a given annotation
In [1]:
import os
import sys
import time
import pandas as pd
import pybel
from pybel.constants import VERSION as PYBEL_VERSION
from pybel_tools import selection
from pybel_tools.summary import info_json, info_list
from pybel_tools.mutation import infer_central_dogma
In [2]:
print(sys.version)
In [3]:
print(time.asctime())
In [4]:
print(PYBEL_VERSION)
In [5]:
bms_base = os.environ['BMS_BASE']
In this example, we'll summarize the NeuroMMSig subgraphs in the Epilepsy Knowledge Assembly (Hoyt, et. al 2018).
In [6]:
graph = pybel.from_pickle(os.path.join(bms_base, 'aetionomy', 'epilepsy', 'epilepsy.gpickle'))
print(graph)
In [7]:
infer_central_dogma(graph)
In [8]:
subgraphs = selection.get_subgraphs_by_annotation(graph, 'Subgraph')
len(subgraphs)
Out[8]:
In [9]:
def fix_columns(df_):
for c in ['Authors', 'Nodes', 'Edges', 'Citations', 'Components']:
df_[c] = df_[c].astype(int)
Using the info_json function, the nodes, edges, citations, authors, average degree, and network density of a graph are entered in a dictionary.
In [ ]:
In [10]:
data = {
subgraph_name.capitalize(): info_json(subgraph)
for subgraph_name, subgraph in subgraphs.items()
}
df = pd.DataFrame(data).T
fix_columns(df)
df_total = pd.DataFrame({'Total': info_json(graph)}).T
del df_total['Compilation warnings']
fix_columns(df_total)
df_total
df = pd.concat([df, df_total])
df
Out[10]:
The dataframe can be output to CSV, or a wide variety of other formats using pandas.
In [11]:
path = os.path.join(os.path.expanduser('~'), 'Desktop', 'subgraph_summary.vcsv')
df[['Nodes', 'Edges', 'Components', 'Citations']].to_csv(path)